---
title: "R Notebook"
output: html_notebook
---

#Load Data and run DESEQ2
```{r}
library(DESeq2)
library("magrittr")
library("dplyr")
library("ggplot2")
library("reshape2")
library("RColorBrewer")
library("gplots")
library("ggbio")

matrix = read.table("larvae.counts.matrix")
samples = read.table("larvae.txt", sep="\t", header=TRUE, row.names = 1)
samples$ID = rownames(samples)

# remove all rows with less than 10 total counts
larvae_new = matrix %>% mutate(Total = dplyr::select(., L1:YBB2) %>% rowSums(na.rm = TRUE))
rownames(larvae_new) = rownames(matrix)
larvae_new = subset(larvae_new, larvae_new[ , 29] > 10, drop=FALSE ) 
larvae_new = larvae_new[,1:28]

matrix = as.matrix(larvae_new)
matrix2 = round(matrix)


dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Genotype + Population)

#remove L3v1, a technical replicate

dds <- dds[,-11]

#remove SKBB1, a spurious sample
dds <- dds[,-24]


dds$Genotype <- relevel(dds$Genotype, ref = "BB")


dds = DESeq(dds)

resultsNames(dds)
plotDispEsts(dds)

```

#pull out DE genes according the genotype
```{r}
res_geno_AAvsBB <- results(dds, contrast=c("Genotype","AA","BB"), alpha=0.05) %>% as.data.frame
resx_geno_AAvsBB <- res_geno_AAvsBB %>% subset(padj < 0.05) %>% as.data.frame

sig_AABB = resx_geno_AAvsBB[,c(2,6)]
sig_AABB$contig = rownames(sig_AABB)

res_geno_ABvsBB <- results(dds, contrast=c("Genotype","AB","BB"), alpha=0.05)  %>% as.data.frame
resx_geno_ABvsBB <- res_geno_ABvsBB %>% subset(padj < 0.05) %>% as.data.frame

sig_ABBB = resx_geno_ABvsBB[,c(2,6)]
sig_ABBB$contig = rownames(sig_ABBB)



res_geno_AAvsAB <- results(dds, contrast=c("Genotype","AB","AA"), alpha=0.05)  %>% as.data.frame
resx_geno_AAvsAB <- res_geno_AAvsAB %>% subset(padj < 0.05) %>% as.data.frame

sig_AAAB = resx_geno_AAvsAB[,c(2,6)]
sig_AAAB$contig = rownames(sig_AAAB)


#merge results
resx_geno_AAvsBB$test = "AAvsBB"
resx_geno_ABvsBB$test = "ABvsBB"
resx_geno_AAvsAB$test = "AAvsAB"



resx = rbind(resx_geno_AAvsBB, resx_geno_ABvsBB, resx_geno_AAvsAB)
resx = resx[!duplicated(resx$baseMean), ]

sig_geno = resx[,c(2,6)]
sig_geno$contig = rownames(sig_geno)


colnames(res_geno_AAvsAB) = c("baseMean_AAvsAB" ,      "log2FoldChange_AAvsAB" ,"lfcSE_AAvsAB"  ,        "stat_AAvsAB"   ,        "pvalue_AAvsAB"        , "padj_AAvsAB" )  

colnames(res_geno_AAvsBB) = c("baseMean_AAvsBB" ,      "log2FoldChange_AAvsBB" ,"lfcSE_AAvsBB"  ,        "stat_AAvsBB"   ,        "pvalue_AAvsBB"        , "padj_AAvsBB" )

colnames(res_geno_ABvsBB) = c("baseMean_ABvsBB" ,      "log2FoldChange_ABvsBB" ,"lfcSE_ABvsBB"  ,        "stat_ABvsBB"   ,        "pvalue_ABvsBB"        , "padj_ABvsBB" )

res_geno_AAvsAB = as.data.frame(res_geno_AAvsAB)
res_geno_AAvsAB$contig = rownames(res_geno_AAvsAB)
res_geno_AAvsAB$contig = as.character(res_geno_AAvsAB$contig)
good = cbind(res_geno_AAvsAB$contig,res_geno_AAvsAB$log2FoldChange_AAvsAB,res_geno_AAvsAB$pvalue_AAvsAB,res_geno_AAvsAB$padj_AAvsAB,res_geno_ABvsBB$log2FoldChange_ABvsBB,res_geno_ABvsBB$pvalue_ABvsBB,res_geno_ABvsBB$padj_ABvsBB, res_geno_AAvsBB$log2FoldChange_AAvsBB,res_geno_AAvsBB$pvalue_AAvsBB,res_geno_AAvsBB$padj_AAvsBB)
good= as.data.frame(good)
colnames(good) = c("contig", "log2FoldChange_AAvsAB", "pval_AAvsAB", "padj_AAvsAB","log2FoldChange_ABvsBB","pvalue_ABvsBB","padj_ABvsBB", "log2FoldChange_AAvsBB","pvalue_AAvsBB", "padj_AAvsBB")
good = subset(good, good$contig %in% rownames(resx))

```


#look at inheritance of sig DE genes
```{r}
exp = as.data.frame(counts(dds, normalized=TRUE))
exp = subset(exp,rownames(exp) %in% rownames(resx))
exp = t(exp)

all = merge(samples,exp, by=0)


for (i in seq(9,dim(all)[2])) {

contig = colnames(all[i])
print(contig)
aux<- all[, c(contig, "Genotype")]
colnames(aux)<-c("transcript", "Genotype") 
print(ggplot(data=aux, aes(y=transcript, x=Genotype)) + geom_boxplot() +geom_jitter(shape=16, width = 0.2))
  
  
}

ggplot(data=all, aes(y=P13F_TRINITY_DN16250_c0_g1_i1
                     , x=Genotype)) + geom_boxplot() +geom_jitter(shape=16, width = 0.2) 

```



#topGO
```{r}
library(topGO)
library(splitstackshape)



#Read in GO mappings for tested contigs

geneID2GO = readMappings("larvae_tested_go.txt")

geneNames = names(geneID2GO)

#for AA vs BB
AABB = rownames(sig_AABB)

geneList = factor(as.integer(geneNames %in% AABB))
names(geneList) <- geneNames



GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList,
                    annot = annFUN.gene2GO, gene2GO = geneID2GO)

resultTopGO.elim <- runTest(GOdata, algorithm = "elim", statistic = "Fisher" )
allRes_AABB <- GenTable(GOdata, elimKS = resultTopGO.elim,
                   orderBy = "elimKS", 
                   topNodes = length(geneList))

allRes_AABB$padjust<-p.adjust(allRes_AABB$elimKS,method="BH")

#for AB vs. BB
ABBB = rownames(sig_ABBB)

geneList = factor(as.integer(geneNames %in% ABBB))
names(geneList) <- geneNames



GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList,
                    annot = annFUN.gene2GO, gene2GO = geneID2GO)

resultTopGO.elim <- runTest(GOdata, algorithm = "elim", statistic = "Fisher" )
allRes_ABBB <- GenTable(GOdata, elimKS = resultTopGO.elim,
                   orderBy = "elimKS", 
                   topNodes = length(geneList))

allRes_ABBB$padjust<-p.adjust(allRes_ABBB$elimKS,method="BH")


#for AA vs. AB
AAAB = rownames(sig_AAAB)

geneList = factor(as.integer(geneNames %in% AAAB))
names(geneList) <- geneNames



GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList,
                    annot = annFUN.gene2GO, gene2GO = geneID2GO)

resultTopGO.elim <- runTest(GOdata, algorithm = "elim", statistic = "Fisher" )
allRes_AAAB <- GenTable(GOdata, elimKS = resultTopGO.elim,
                   orderBy = "elimKS", 
                   topNodes = length(geneList))

allRes_AAAB$padjust<-p.adjust(allRes_AAAB$elimKS,method="BH")



#for_combined AA vs. BB AB vs. BB and AA vs. AB
all = rownames(sig_geno)

geneList = factor(as.integer(geneNames %in% all))
names(geneList) <- geneNames



GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList,
                    annot = annFUN.gene2GO, gene2GO = geneID2GO)

resultTopGO.elim <- runTest(GOdata, algorithm = "elim", statistic = "Fisher" )
allRes_larvae_geno <- GenTable(GOdata, elimKS = resultTopGO.elim,
                   orderBy = "elimKS", 
                   topNodes = length(geneList))

allRes_larvae_geno$padjust<-p.adjust(allRes_larvae_geno$elimKS,method="BH")

```





#get position info
```{r}
positions = read.table("/Users/emmaberdan/Documents/RNA study/New Transcriptome/positions.csv", header=TRUE, sep=",")


```

#look at overall sample relationships using matrix and pca
```{r}
vsd <- vst(dds, blind=FALSE)


greeks=c(alpha='\u03b1', tau='\u03c4', sigma='\u03c3',
                         beta='\u03b2',
                         gamma='\u03b3')

greeks2 = c(expression(paste(beta,beta)), expression(paste(alpha,alpha)),expression(paste(alpha,beta)))


levels(vsd$Genotype)  = c(paste0(greeks['beta'],greeks['beta']),paste0(greeks['alpha'],greeks['alpha']),paste0(greeks['alpha'],greeks['beta']))


library("pheatmap")

sampleDists <- dist(t(assay(vsd)), method = "manhattan")

library(vegan)

samples4 = samples[-c(11,25),]
adonis2(formula= sampleDists ~ samples4$Genotype+ samples4$Population, method = "manhattan" )

library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- vsd$Genotype
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)



ggplotColours <- function(n = 6, h = c(0, 360) + 15){
  if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n
  hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}


pcaData <- plotPCA(vsd, intgroup=c("Genotype"), returnData=TRUE, ntop=15859 )
percentVar <- round(100 * attr(pcaData, "percentVar"))
pcaData$Genotype = ordered(pcaData$Genotype, levels=c("AA", "BB", "AB"))

good = ggplot(pcaData, aes(PC1, PC2, color=Genotype)) +
  geom_point(size=4) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + scale_colour_manual(values=ggplotColours(n=3),labels=greeks2) +
  coord_fixed()


cairo_pdf("pca_larvae.pdf",width=6,height=6)
good
dev.off()



pcaData <- plotPCA(vsd, intgroup=c("Population", "Genotype"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))

pop = ggplot(pcaData, aes(PC1, PC2, color=Population, shape=Genotype)) +
  geom_point(size=4) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance"))  + scale_shape_manual(values=c(15,16,17), labels=greeks2) +
  coord_fixed()

cairo_pdf("pca_pop_larvae.pdf",width=8,height=8)
pop
dev.off()


```

#MA and volcano for genotype for larvae
```{r}
resLFC_geno <- lfcShrink(dds, coef="Genotype_AA_vs_BB", independentFiltering=TRUE,cooksCutoff=TRUE)
plotMA(resLFC_geno,0.05, ylim=c(-15,15))
library("EnhancedVolcano")
geno = EnhancedVolcano(resLFC_geno,
                lab= rownames(resLFC_geno),
    x = 'log2FoldChange',
    y = 'pvalue',
    pCutoff = 10e-4,
    xlim = c(-6, 6))

pdf("geno_larvae_AAvsBB_volcano.pdf",width=16,height=8)
geno
dev.off()






resLFC_geno2 <- lfcShrink(dds, coef="Genotype_AB_vs_BB", independentFiltering=TRUE,cooksCutoff=TRUE)
plotMA(resLFC_geno2,0.05, ylim=c(-15,15))
library("EnhancedVolcano")
geno2 = EnhancedVolcano(resLFC_geno2,
                lab= rownames(resLFC_geno2),
    x = 'log2FoldChange',
    y = 'pvalue',
    pCutoff = 10e-4,
    xlim = c(-5, 5))

pdf("geno_larvae_ABvsBB_volcano.pdf",width=16,height=8)
geno2
dev.off()



```




#DESEQ analysis using only Skeie
```{r}
library(DESeq2)
library("magrittr")
library("dplyr")
library("ggplot2")
library("reshape2")
library("RColorBrewer")
library("gplots")
library("ggbio")

matrix = read.table("/Users/emmaberdan/Documents/RNA study/Rsem_reducedref/larvae.counts.matrix")
samples = read.table("/Users/emmaberdan/Documents/RNA study/larvae.txt", sep="\t", header=TRUE, row.names = 1)
samples$ID = rownames(samples)

# remove all rows with less than 10 total counts
larvae_new = matrix %>% mutate(Total = dplyr::select(., L1:YBB2) %>% rowSums(na.rm = TRUE))
rownames(larvae_new) = rownames(matrix)
larvae_new = subset(larvae_new, larvae_new[ , 29] > 10, drop=FALSE ) 
larvae_new = larvae_new[,1:28]

matrix = as.matrix(larvae_new)
matrix2 = round(matrix)


dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ genotype)
#remove all non-skeie L1, L21, L23, L3v2

dds <- dds[,c(1,6,7,12,23,24,25,26)]
dds <- dds[,-7]


dds$genotype <- relevel(dds$genotype, ref = "BB")

dds = DESeq(dds)

resultsNames(dds)
plotDispEsts(dds)

```


#look at overall sample relationships using matrix and pca
```{r}
vsd_sk <- vst(dds, blind=FALSE)
rld_sk <- rlog(dds, blind=FALSE)

library("pheatmap")
sampleDists <- dist(t(assay(vsd_sk)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- vsd_sk$ID
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

plotPCA(vsd_sk, intgroup=c("genotype"))


greeks = c(expression(paste(beta,beta)), expression(paste(alpha,alpha)), expression(paste(alpha,beta)))

pcaData <- plotPCA(vsd_sk, intgroup=c("genotype"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))

pop = ggplot(pcaData, aes(PC1, PC2, color=genotype)) +
  geom_point(size=4) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance"))  + scale_color_manual(values=ggplotColours(n=3), labels=greeks) +
  coord_fixed()

cairo_pdf("pca_skeie_larvae.pdf",width=8,height=8)
pop
dev.off()


```



Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Cmd+Option+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file). 

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

